Correlation matrix and plots

Model autosearch

df_sub = df[, -c(1, 2, 4)]
str(df_sub)
m01 = lm(data = df_sub, log(df_sub$ARPU+1)~.)
m01AIC = step(m01, k = 2, direction = "both" )

m02 = lm(data = df_sub, df_sub$ARPU~.)
m02AIC = step(m02, k = 2, direction = "both" )
print("==========")

Models consider

## 
## Calls:
## m01AIC: lm(formula = log(df_sub$ARPU + 1) ~ Brand.total + DiD, data = df_sub)
## m02AIC: lm(formula = df_sub$ARPU ~ Brand.total + Stables.total + DiD, 
##     data = df_sub)
## m10: lm(formula = I(log(ARPU + 1)) ~ spread_moex + USDT + Ставка + 
##     time_period + time_period + Group + DiD, data = df)
## m11: lm(formula = I(log(ARPU + 1)) ~ spread_moex + USDT + Ставка + 
##     time_period + I(Brand.total) + I(Stables.total) + time_period + 
##     Group + DiD, data = df)
## m12: lm(formula = I(log(ARPU + 1)) ~ spread_moex + USDT + Ставка + 
##     time_period + I(log(Brand.total)) + I(log(Stables.total)) + 
##     time_period + Group + DiD, data = df)
## m13: lm(formula = ARPU ~ spread_moex + USDT + Ставка + time_period + 
##     I(log(Brand.total)) + I(log(Stables.total)) + time_period + 
##     Group + DiD, data = df)
## 
## ===============================================================================================================================
##                                 m01AIC           m02AIC           m10               m11               m12             m13      
##                         --------------------- ------------ ----------------- ----------------- ----------------- ------------   
##                          log(df_sub$ARPU + 1)  df_sub$ARPU  I(log(ARPU + 1))  I(log(ARPU + 1))  I(log(ARPU + 1))     ARPU      
## -------------------------------------------------------------------------------------------------------------------------------
##   (Intercept)                  5.873***          884.138        10.893            10.020              9.759        29669.910   
##                               (0.495)          (2082.288)       (6.499)           (6.353)           (12.706)      (35423.912)  
##   Brand.total                  0.000**             0.438*                                                                      
##                               (0.000)             (0.186)                                                                      
##   DiD                          0.511*           1682.661**       0.759             0.759              0.759         1599.458   
##                               (0.184)           (540.337)       (0.443)           (0.414)            (0.411)       (1146.297)  
##   Stables.total                                   -0.051                                                                       
##                                                   (0.036)                                                                      
##   spread_moex                                                    0.062             0.142              0.143          511.151   
##                                                                 (0.203)           (0.211)            (0.205)        (570.598)  
##   USDT                                                           0.031            -0.009             -0.004           -6.429   
##                                                                 (0.091)           (0.087)            (0.086)        (239.143)  
##   Ставка                                                        -0.006            -0.004             -0.004          -10.572   
##                                                                 (0.004)           (0.005)            (0.005)         (12.623)  
##   time_period                                                   -1.144            -0.293             -0.311          -69.666   
##                                                                 (0.606)           (0.692)            (0.674)       (1880.255)  
##   Group                                                         -0.262            -0.262             -0.262         -116.372   
##                                                                 (0.369)           (0.345)            (0.342)        (953.777)  
##   I(Brand.total)                                                                   0.000                                       
##                                                                                   (0.000)                                      
##   I(Stables.total)                                                                -0.000                                       
##                                                                                   (0.000)                                      
##   I(log(Brand.total))                                                                                 1.544*        3753.891   
##                                                                                                      (0.708)       (1974.495)  
##   I(log(Stables.total))                                                                              -1.225        -5116.024   
##                                                                                                      (0.972)       (2709.694)  
## -------------------------------------------------------------------------------------------------------------------------------
##   AIC                         36.647             450.425        47.797            45.396             44.983          457.502   
##   BIC                         41.680             456.716        57.862            57.977             57.564          470.083   
##   adj. R-squared               0.354               0.306         0.118             0.229              0.241            0.198   
##   N                           26                  26            26                26                 26               26       
## ===============================================================================================================================
##   Significance: *** = p < 0.001; ** = p < 0.01; * = p < 0.05

Residuals vs fitted values

Check normality assumption

## Warning: Ignoring unknown parameters: main

## [[1]]
## [1] "m01AIC"
## 
## 
##  Jarque-Bera Normality Test
## 
## data:  as.vector(m1$residuals)
## JB = 2.0464, p-value = 0.3594
## alternative hypothesis: greater
## 
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.1342492       2.21838   0.648
##  Alternative hypothesis: rho != 0
## 
##  RESET test
## 
## data:  m1
## RESET = 0.093995, df1 = 1, df2 = 22, p-value = 0.762
## 
## [1] "======================================"
## Warning: Ignoring unknown parameters: main

## [[1]]
## [1] "m02AIC"
## 
## 
##  Jarque-Bera Normality Test
## 
## data:  as.vector(m1$residuals)
## JB = 29.119, p-value = 4.751e-07
## alternative hypothesis: greater
## 
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.1751643      2.264155   0.664
##  Alternative hypothesis: rho != 0
## 
##  RESET test
## 
## data:  m1
## RESET = 0.48333, df1 = 1, df2 = 21, p-value = 0.4945
## 
## [1] "======================================"
## Warning: Ignoring unknown parameters: main

## Warning: Computation failed in `stat_bin()`:
## attempt to apply non-function

## [[1]]
## [1] "m10"
## 
## 
##  Jarque-Bera Normality Test
## 
## data:  as.vector(m1$residuals)
## JB = 0.11476, p-value = 0.9442
## alternative hypothesis: greater
## 
##  lag Autocorrelation D-W Statistic p-value
##    1       0.1867099      1.548469   0.138
##  Alternative hypothesis: rho != 0
## 
##  RESET test
## 
## data:  m1
## RESET = 0.84365, df1 = 1, df2 = 18, p-value = 0.3705
## 
## [1] "======================================"
## Warning: Ignoring unknown parameters: main

## Warning: Computation failed in `stat_bin()`:
## attempt to apply non-function

## [[1]]
## [1] "m11"
## 
## 
##  Jarque-Bera Normality Test
## 
## data:  as.vector(m1$residuals)
## JB = 0.060428, p-value = 0.9702
## alternative hypothesis: greater
## 
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.1474577      2.222687   0.946
##  Alternative hypothesis: rho != 0
## 
##  RESET test
## 
## data:  m1
## RESET = 0.078645, df1 = 1, df2 = 16, p-value = 0.7827
## 
## [1] "======================================"
## Warning: Ignoring unknown parameters: main

## Warning: Computation failed in `stat_bin()`:
## attempt to apply non-function

## [[1]]
## [1] "m12"
## 
## 
##  Jarque-Bera Normality Test
## 
## data:  as.vector(m1$residuals)
## JB = 0.058093, p-value = 0.9714
## alternative hypothesis: greater
## 
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.1559159      2.239441   0.928
##  Alternative hypothesis: rho != 0
## 
##  RESET test
## 
## data:  m1
## RESET = 0.19566, df1 = 1, df2 = 16, p-value = 0.6642
## 
## [1] "======================================"
## Warning: Ignoring unknown parameters: main

## [[1]]
## [1] "m13"
## 
## 
##  Jarque-Bera Normality Test
## 
## data:  as.vector(m1$residuals)
## JB = 12.402, p-value = 0.002027
## alternative hypothesis: greater
## 
##  lag Autocorrelation D-W Statistic p-value
##    1      -0.2894041      2.489903   0.532
##  Alternative hypothesis: rho != 0
## 
##  RESET test
## 
## data:  m1
## RESET = 4.2922, df1 = 1, df2 = 16, p-value = 0.05481
## 
## [1] "======================================"

Autocorrelation

j = 1
for (i in names_m_i) {
m1 = i

ts4 = zoo(studres((m1)))#, order.by=time(df1))
ts4_lag = lag(ts4, k = -1)[c(-dim(m1$model)[1] )]

ts4_lag
df_res = data.frame(ts4[c(-1)], ts4_lag)
df_res

plot(df_res$ts4.c..1.., df_res$ts4_lag , col = 'blue3', pch = 20, main = names_m[j])
abline(h = median(studres((m1))), col = 'red2')
abline(v = median(studres((m1))), col = 'red2')

df_res_t = data.frame(df$time, studres(m1))
print(ggplot(df_res_t, aes(x = df_res_t[, 1], y=df_res_t[, 2]))+geom_line(col = "blue3", alpha = 0.85, size = 0.8)+ylab(colnames(df_res_t)[2])+xlab(names_m[j])+geom_point(size = 3))

print("corr coef: ")
print(cor(df_res$ts4.c..1.., df_res$ts4_lag))
j = j + 1
}

## [1] "corr coef: "
## [1] -0.1276279

## [1] "corr coef: "
## [1] -0.151565

## [1] "corr coef: "
## [1] 0.1432378

## [1] "corr coef: "
## [1] -0.1768364

## [1] "corr coef: "
## [1] -0.1833807

## [1] "corr coef: "
## [1] -0.2785566
datatable(vcovHAC(m11))
acf(m11$residuals, type = "correlation")

Heteroscedasticity

Goldfeld-Quandt test

## [[1]]
## [1] "m01AIC"
## 
## 
##  Goldfeld-Quandt test
## 
## data:  m1
## GQ = 0.83892, df1 = 8, df2 = 7, p-value = 0.5983
## alternative hypothesis: variance increases from segment 1 to 2
## 
## [[1]]
## [1] "m02AIC"
## 
## 
##  Goldfeld-Quandt test
## 
## data:  m1
## GQ = 2.1326, df1 = 7, df2 = 6, p-value = 0.1875
## alternative hypothesis: variance increases from segment 1 to 2
## 
## [[1]]
## [1] "m10"
## 
## 
##  Goldfeld-Quandt test
## 
## data:  m1
## GQ = 2.733, df1 = 4, df2 = 3, p-value = 0.2175
## alternative hypothesis: variance increases from segment 1 to 2
## 
## [[1]]
## [1] "m11"
## 
## 
##  Goldfeld-Quandt test
## 
## data:  m1
## GQ = 0.88623, df1 = 2, df2 = 1, p-value = 0.6006
## alternative hypothesis: variance increases from segment 1 to 2
## 
## [[1]]
## [1] "m12"
## 
## 
##  Goldfeld-Quandt test
## 
## data:  m1
## GQ = 0.88623, df1 = 2, df2 = 1, p-value = 0.6006
## alternative hypothesis: variance increases from segment 1 to 2
## 
## [[1]]
## [1] "m13"
## 
## 
##  Goldfeld-Quandt test
## 
## data:  m1
## GQ = 0.72595, df1 = 2, df2 = 1, p-value = 0.6386
## alternative hypothesis: variance increases from segment 1 to 2

Multicollinearity

Condition number, VIF

## 
## Call:
## lm(formula = log(df_sub$ARPU + 1) ~ Brand.total + DiD, data = df_sub)
## 
## Coefficients:
## (Intercept)  Brand.total          DiD  
##   5.8727770    0.0001955    0.5106329  
## 
## [1] "CN: "
## [1] 16780.49
## [1] "VIF: "
## Brand.total         DiD 
##    1.003267    1.003267 
## [1] "==========================="
## 
## Call:
## lm(formula = df_sub$ARPU ~ Brand.total + Stables.total + DiD, 
##     data = df_sub)
## 
## Coefficients:
##   (Intercept)    Brand.total  Stables.total            DiD  
##     884.13842        0.43848       -0.05091     1682.66076  
## 
## [1] "CN: "
## [1] 135118.7
## [1] "VIF: "
##   Brand.total Stables.total           DiD 
##      1.027484      1.107771      1.092117 
## [1] "==========================="
## 
## Call:
## lm(formula = I(log(ARPU + 1)) ~ spread_moex + USDT + Ставка + 
##     time_period + time_period + Group + DiD, data = df)
## 
## Coefficients:
## (Intercept)  spread_moex         USDT       Ставка  time_period        Group  
##   10.893212     0.062414     0.030834    -0.006337    -1.144205    -0.261930  
##         DiD  
##    0.759081  
## 
## [1] "CN: "
## [1] 5683.098
## [1] "VIF: "
## spread_moex        USDT      Ставка time_period       Group         DiD 
##    4.630041    1.441197   12.401259    7.470051    3.250000    4.250000 
## [1] "==========================="
## 
## Call:
## lm(formula = I(log(ARPU + 1)) ~ spread_moex + USDT + Ставка + 
##     time_period + I(Brand.total) + I(Stables.total) + time_period + 
##     Group + DiD, data = df)
## 
## Coefficients:
##      (Intercept)       spread_moex              USDT            Ставка  
##        1.002e+01         1.423e-01        -8.908e-03        -3.839e-03  
##      time_period    I(Brand.total)  I(Stables.total)             Group  
##       -2.935e-01         1.963e-04        -2.102e-05        -2.619e-01  
##              DiD  
##        7.591e-01  
## 
## [1] "CN: "
## [1] 383606.1
## [1] "VIF: "
##      spread_moex             USDT           Ставка      time_period 
##         5.702496         1.514602        16.536084        11.139683 
##   I(Brand.total) I(Stables.total)            Group              DiD 
##         1.716091         1.968835         3.250000         4.250000 
## [1] "==========================="
## 
## Call:
## lm(formula = I(log(ARPU + 1)) ~ spread_moex + USDT + Ставка + 
##     time_period + I(log(Brand.total)) + I(log(Stables.total)) + 
##     time_period + Group + DiD, data = df)
## 
## Coefficients:
##           (Intercept)            spread_moex                   USDT  
##              9.758794               0.142780              -0.003689  
##                Ставка            time_period    I(log(Brand.total))  
##             -0.004029              -0.310692               1.544391  
## I(log(Stables.total))                  Group                    DiD  
##             -1.224944              -0.261930               0.759081  
## 
## [1] "CN: "
## [1] 9095.894
## [1] "VIF: "
##           spread_moex                  USDT                Ставка 
##              5.468535              1.492173             15.765233 
##           time_period   I(log(Brand.total)) I(log(Stables.total)) 
##             10.762144              1.668016              2.004922 
##                 Group                   DiD 
##              3.250000              4.250000 
## [1] "==========================="
## 
## Call:
## lm(formula = ARPU ~ spread_moex + USDT + Ставка + time_period + 
##     I(log(Brand.total)) + I(log(Stables.total)) + time_period + 
##     Group + DiD, data = df)
## 
## Coefficients:
##           (Intercept)            spread_moex                   USDT  
##             29669.910                511.151                 -6.429  
##                Ставка            time_period    I(log(Brand.total))  
##               -10.572                -69.666               3753.891  
## I(log(Stables.total))                  Group                    DiD  
##             -5116.024               -116.372               1599.458  
## 
## [1] "CN: "
## [1] 30639.87
## [1] "VIF: "
##           spread_moex                  USDT                Ставка 
##              5.468535              1.492173             15.765233 
##           time_period   I(log(Brand.total)) I(log(Stables.total)) 
##             10.762144              1.668016              2.004922 
##                 Group                   DiD 
##              3.250000              4.250000 
## [1] "==========================="

Significance of coefficients with HAC correction for m01AIC

## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## (Intercept) 5.8728e+00 3.9597e-01 14.8314 2.899e-13 ***
## Brand.total 1.9550e-04 5.3126e-05  3.6799  0.001241 ** 
## DiD         5.1063e-01 2.1418e-01  2.3841  0.025756 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1